# -*- coding: utf-8 -*-
# @Time: 2023/2/8 17:47
# @Author: MinChess
# @File: reword.py
# @Software: PyCharm
import requests
from lxml import etree
import docx

headers = {
    'Cookie': 'clickbids=116117; Hm_lvt_6dfe3c8f195b43b8e667a2a2e5936122=1675151965,1675218073; Hm_lpvt_6dfe3c8f195b43b8e667a2a2e5936122=1675218073',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
}

file = docx.Document()

def catalog_get():

    cata_url = 'https://www.biquzw.la/116_116117/'
    response = requests.get(cata_url, headers=headers)
    response.encoding = 'utf-8'
    html = etree.HTML(response.text, parser=etree.HTMLParser(encoding='utf-8'))
    i = 1
    while True:
        href = html.xpath('//*[@id="list"]/dl/dd[' + str(i) + ']/a/@href')
        title = html.xpath('//*[@id="list"]/dl/dd[' + str(i) + ']/a/@title')
        if len(href):
            print(href, title)
            i = i + 1
            content_get(title[0],href[0])
        else:
            print("爬取完成")
            file.save("./mkj.docx")
            break


def content_get(title,url):
    file.add_heading(text=title, level=1)
    content_url = 'https://www.biquzw.la/116_116117/'
    content_response = requests.get(content_url + url, headers=headers)
    content_response.encoding = 'utf-8'
    html = etree.HTML(content_response.text,parser=etree.HTMLParser(encoding='utf-8'))
    content = html.xpath('//*[@id="content"]/text()')
    for i in range(len(content)):
        # print(content[i])
        file.add_paragraph(content[i])



if __name__ == '__main__':
    catalog_get()
